{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "test = pd.read_table('data/test/subsidy_final_test.txt',sep=',',header=-1)\n",
    "test.columns = ['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def trans(num):\n",
    "    num = int(num)\n",
    "    if num == 0:\n",
    "        return 0\n",
    "    if num == 1:\n",
    "        return 1000\n",
    "    if num == 2:\n",
    "        return 1500\n",
    "    if num == 3:\n",
    "        return 2000"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "## Strat"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2231\n",
      "1500奖学金数量： 691\n",
      "2000奖学金数量： 298\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob2 = pd.read_csv(\"../education_final_common_top/gbdt_prob_model1_1.csv\",index_col=0)\n",
    "prob = 0.7*prob1+0.3*prob2\n",
    "yprob = prob.values\n",
    "#pd.DataFrame(prob).to_csv(\"combine_prob_1.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "#pred.to_csv(\"./result1.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2399\n",
      "1500奖学金数量： 656\n",
      "2000奖学金数量： 238\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob2 = pd.read_csv(\"../education_final_common_top/gbdt_prob_model1_1.csv\",index_col=0)\n",
    "prob3 = pd.read_csv(\"../education_final_before_xgb/xgb_prob_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob4 = pd.read_csv(\"../education_final_before_gbdt/before_gbdt_prob_1_0.02724.csv\",index_col=0)\n",
    "prob5 = pd.read_csv(\"../education_final_common_top_2/xgb_prob_model1_seed100_1_0.02752.csv\",index_col=0)\n",
    "prob = 0.3*prob1+0.3*prob3+0.1*prob2+0.15*prob4+0.15*prob5\n",
    "yprob = prob.values\n",
    "pd.DataFrame(prob).to_csv(\"combine_prob_2.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "pred.to_csv(\"./result2.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n",
    "\"\"\"0.02770\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2403\n",
      "1500奖学金数量： 676\n",
      "2000奖学金数量： 258\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob3 = pd.read_csv(\"../education_final_before_xgb/xgb_prob_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob = 0.8*prob1+0.2*prob3\n",
    "yprob = prob.values\n",
    "pd.DataFrame(prob).to_csv(\"combine_prob_3.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "pred.to_csv(\"./result3.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n",
    "\"\"\"0.02738\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2522\n",
      "1500奖学金数量： 726\n",
      "2000奖学金数量： 235\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob3 = pd.read_csv(\"../education_final_before_xgb/xgb_prob_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob = 0.2*prob1+0.8*prob3\n",
    "yprob = prob.values\n",
    "pd.DataFrame(prob).to_csv(\"combine_prob_3.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "pred.to_csv(\"./result3.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n",
    "\"\"\"0.02795\"\"\"\n",
    "\"\"\"说明xgb_prob_seed100_1_0.02783.csv的效果更好一点，以它为主\"\"\"\n",
    "\"\"\"而且1000明显变多，1500略微变多，2000下降\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2473\n",
      "1500奖学金数量： 707\n",
      "2000奖学金数量： 220\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob2 = pd.read_csv(\"../education_final_common_top/gbdt_prob_model1_1.csv\",index_col=0)\n",
    "prob3 = pd.read_csv(\"../education_final_before_xgb/xgb_prob_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob4 = pd.read_csv(\"../education_final_before_gbdt/before_gbdt_prob_1_0.02724.csv\",index_col=0)\n",
    "prob5 = pd.read_csv(\"../education_final_common_top_2/xgb_prob_model1_seed100_1_0.02752.csv\",index_col=0)\n",
    "prob = 0.16*prob1+0.64*prob3+0.03*prob2+0.08*prob4+0.09*prob5\n",
    "yprob = prob.values\n",
    "#pd.DataFrame(prob).to_csv(\"combine_prob_3.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "#pred.to_csv(\"./result3.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2443\n",
      "1500奖学金数量： 689\n",
      "2000奖学金数量： 235\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob2 = pd.read_csv(\"../education_final_common_top/gbdt_prob_model1_1.csv\",index_col=0)\n",
    "prob3 = pd.read_csv(\"../education_final_before_xgb/xgb_prob_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob4 = pd.read_csv(\"../education_final_before_gbdt/before_gbdt_prob_1_0.02724.csv\",index_col=0)\n",
    "prob5 = pd.read_csv(\"../education_final_common_top_2/xgb_prob_model1_seed100_1_0.02752.csv\",index_col=0)\n",
    "prob = 0.14*prob1+0.56*prob3+0.05*prob2+0.15*prob4+0.1*prob5\n",
    "yprob = prob.values\n",
    "#pd.DataFrame(prob).to_csv(\"combine_prob_3.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "#pred.to_csv(\"./result3.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2319\n",
      "1500奖学金数量： 652\n",
      "2000奖学金数量： 262\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"../education_final_common_top/xgb_prob_model1_seed100_1_0.02783.csv\",index_col=0)\n",
    "prob2 = pd.read_csv(\"../education_final_common_top/gbdt_prob_model1_1.csv\",index_col=0)\n",
    "prob4 = pd.read_csv(\"../education_final_before_gbdt/before_gbdt_prob_1_0.02724.csv\",index_col=0)\n",
    "prob5 = pd.read_csv(\"../education_final_common_top_2/xgb_prob_model1_seed100_1_0.02752.csv\",index_col=0)\n",
    "prob = 0.6*prob1+0.1*prob2+0.15*prob4+0.15*prob5\n",
    "yprob = prob.values\n",
    "pd.DataFrame(prob).to_csv(\"combine_prob_4.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "pred.to_csv(\"./result4.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n",
    "\"\"\"更差\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1000奖学金数量： 2451\n",
      "1500奖学金数量： 739\n",
      "2000奖学金数量： 255\n"
     ]
    }
   ],
   "source": [
    "prob1 = pd.read_csv(\"combine_prob_3_0.02795.csv\",index_col=0)\n",
    "prob4 = pd.read_csv(\"../education_final_before_gbdt/before_gbdt_prob_1_0.02724.csv\",index_col=0)\n",
    "prob = 0.85*prob1+0.15*prob4\n",
    "yprob = prob.values\n",
    "pd.DataFrame(prob).to_csv(\"combine_prob_5.csv\")\n",
    "ylabel = np.argmax(yprob, axis=1)\n",
    "pred = pd.DataFrame({'studentid': test['id'],\n",
    "                             'subsidy': ylabel})\n",
    "pred['subsidy'] = pred['subsidy'].apply(lambda x: trans(x))\n",
    "pred.to_csv(\"./result5.csv\", index=False)\n",
    "print \"1000奖学金数量：\",sum(pred['subsidy'] == 1000)\n",
    "print \"1500奖学金数量：\",sum(pred['subsidy'] == 1500)\n",
    "print \"2000奖学金数量：\",sum(pred['subsidy'] == 2000)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
